#############################################################
## Replication file for Ban, Palmer and Schneer 2019       ##
## file: format_fe.R                                       ##
## date: 5/16/2019                                         ##
#############################################################

#install relevant R packages
#install.packages(c('foreign','data.table','haven','stringdist'))

rm(list=ls())

#Load packages
require(foreign)
require(data.table)
require(haven)
require(stringdist)

#define other functions
splitit<-function(x,splitchar,n) sapply(strsplit(as.character(x), splitchar), "[[", n)

#set working directory
setwd("/nfs/home/B/bschneer/shared_space/bschneer/lobbying_replication")

#import and format revolving door data
rev.door <- read_stata("inputs/rev_door_dstats.dta"); setDT(rev.door)
rev.door.match <- read_stata("inputs/rev_door_match.dta"); setDT(rev.door.match)

rev.door <- rev.door[(staffer==1|congress_expol==1|congress_com==1)][!(rev_door==0|is_firm==0)]

rev.door <- unique(rev.door[,.(lobbyist_id=id_lobbyist,is_firm,staffer,congress_com,congress_current,congress_expol,congress_pol_98,lobbyist=lobbyist_stan)])

rev.door[substr(lobbyist_id,1,1)=="Y",lobbyist_id:=paste(substr(lobbyist_id,1,11),"L",sep="")]

rev.door[,lobbyist_name2:=lobbyist]

rev.door <- rev.door.match[rev.door,on='lobbyist_name2']

rev.door[is.na(lobbyist),lobbyist:=i.lobbyist]

rev.door[,lobbyist:=gsub(",,",",",lobbyist)]
rev.door[!grepl(",",lobbyist) & grepl(".",lobbyist),lobbyist:=gsub(".",",",lobbyist)]

rev.door[,`:=`(lobbyist_name2=NULL,i.lobbyist=NULL,lname=splitit(lobbyist,",",1),fname=splitit(lobbyist,",",2),id.a=1:.N)]


#Import lobbyist data

lobbyist.final.all <- fread('usr_gen/lobbyist_final_all.csv')

lobbyist.final.all[,lobbyist:=toupper(lobbyist)]

#prepare to match based on last name
lobbyist.final.all[,`:=`(lname=splitit(lobbyist,",",1),fname=splitit(lobbyist,",",2))]

lobbyist.final.all[,id.b:=1:.N]


lobbyist.final.1 <- rev.door[,.(id.a,lobbyist_id,is_firm,staffer,congress_com,congress_current,congress_expol,congress_pol_98)][lobbyist.final.all,on='lobbyist_id']

#Match attempt
id.a.matched <- lobbyist.final.1$id.a[!is.na(lobbyist.final.1$id.a)]

lobbyist.final.2 <- lobbyist.final.all[id.b %in% lobbyist.final.1[is.na(id.a),id.b]]

rev.door.2 <- rev.door[!(id.a %in% id.a.matched)]

lobbyist.final.2 <- rev.door.2[lobbyist.final.2,on='lobbyist']


#Another Match attempt
id.a.matched <- c(id.a.matched,lobbyist.final.2$id.a[!is.na(lobbyist.final.2$id.a)])

lobbyist.final.3 <- lobbyist.final.all[id.b %in% lobbyist.final.2[is.na(id.a),id.b]]

rev.door.3 <- rev.door[!(id.a %in% id.a.matched)]

rev.door.3[,flfname:=substr(fname,1,1)]
lobbyist.final.3[,flfname:=substr(fname,1,1)]

lobbyist.final.3 <- rev.door.3[lobbyist.final.3,on=c('lname','flfname'),allow.cartesian=T]

#Fuzzy match
lobbyist.final.3[,match.score:=stringdist(fname,i.fname,method='jw',p=0)]

lobbyist.final.3 <- lobbyist.final.3[is.na(id.a) | (!is.na(id.b) & match.score<=0.21)]

lobbyist.final.3 <- rbind(lobbyist.final.3[is.na(id.a)],lobbyist.final.3[!is.na(id.a)][,.SD[which.min(match.score)],keyby=id.a])

#Final matched data
lobbyist.final <- rbind(lobbyist.final.1,lobbyist.final.2,lobbyist.final.3,fill=T)[,unique(c(names(lobbyist.final.all),names(rev.door))),with=F][!is.na(lobbyist_id)]

setkey(lobbyist.final,id.b,id.a)

lobbyist.final[,lobbyist_id:=.SD[,lobbyist_id[1]],by=id.b]

lobbyist.final <- lobbyist.final[,.SD[.N],by=id.b]

lobbyist.final[,`:=`(id.b=NULL,id.a=NULL,lname=NULL,fname=NULL)]

setcolorder(lobbyist.final,c('lobbyist_id','lobbyist','period'))

#create supplementary file to merge onto other versions of the data

rev.door0 <- lobbyist.final[,c('lobbyist_id','is_firm','staffer','congress_com','congress_current','congress_expol','congress_pol_98')]

lobbyist_final_all <- copy(lobbyist.final)


#Import lobbyist FEs

lobbyist.final <- list()

lobbyist.final[["all"]] <- lobbyist_final_all

lobbyist.final[["hilo"]] <- rev.door0[fread('usr_gen/lobbyist_final_hilo.csv'),on='lobbyist_id']

lobbyist.final[["yearly"]] <- rev.door0[fread('usr_gen/lobbyist_final_yearly.csv'),on='lobbyist_id']

lobbyist.final[["50"]] <- rev.door0[fread('usr_gen/lobbyist_final_50.csv'),on='lobbyist_id']

lobbyist.final[["75"]] <- rev.door0[fread('usr_gen/lobbyist_final_75.csv'),on='lobbyist_id']


f1<-read.dta("inputs/lobbyists.dta"); setDT(f1)

n.yr <- f1[year %in% 2000:2014][,.(lobbying.reports=length(unique(trans_id))),by=.(lobbyist_id,year)]

#Create time period data
time.periods<-list()
time.periods[["all"]] <- data.table(year=2000:2014,period=1)
time.periods[["hilo"]] <- data.table(year=2000:2014,period=c(1,2,2,2,1,1,1,1,2,2,2,2,2,2,1))

time.periods[["yearly"]] <- data.table(year=2000:2014,period=2000:2014)
time.periods[["50"]] <- data.table(year=2000:2014,period=c(1,2,2,2,3,3,3,3,4,4,4,4,4,4,5))

time.periods[["75"]] <- data.table(year=2000:2014,period=c(1,2,2,2,3,3,3,3,4,4,4,4,4,5,5))

time.periods.hold <- copy(time.periods)

#Import candidate data
candidates <- list()
candidates[["sen"]] <- read_dta('inputs/matched_SenateCandidates_Vote_LobbyistIDs_v5.dta'); setDT(candidates[["sen"]]); candidates[["sen"]][,dist:=1]

candidates[["house"]] <- read_dta('inputs/matched_HouseCandidates_Vote_LobbyistIDs_v5.dta'); setDT(candidates[["house"]]); 

#ID winners
for (item in names(candidates)){
candidates[[item]][,winner:=as.numeric(vote_share_cand>=0.5)]
setkey(candidates[[item]],candidate_ext_id,year)

candidates[[item]][,everwon:=max(winner,na.rm=T),by=.(candidate_ext_id)]

setnames(candidates[[item]],'lobbyist_ext_id','lobbyist_id')

candidates[[item]] <- candidates[[item]][first_race==1]

}

#Save candidates file for RD later
save(candidates,file="usr_gen/candidates.RData")

candidates <- rbindlist(candidates,fill=T)[lobbyist_id!="" & everwon==1]
candidates<-candidates[,.(house=as.numeric(sum(office=="H")>0),sen=as.numeric(sum(office=="S")>0)),by=.(lobbyist_id)]


#Prep uncertainty data

EPU0 <- read_stata("inputs/EPU_index.dta"); setDT(EPU0)

EPU<-list()

for (item in names(time.periods.hold)){

EPU[[item]] <- time.periods.hold[[item]][EPU0,on='year'][!is.na(period)]

EPU[[item]][,period.label:=paste(min(year),"--",max(year)),by=period]

EPU[[item]] <- EPU[[item]][,lapply(.SD,mean),by=.(period,period.label),.SDcols=c("three_component_index","news_epu","news_epu_12ma")]

}

#Merge by time period
for (item in names(time.periods)){

print(item)

time.periods[[item]] <- time.periods[[item]][n.yr,on='year'][,.(n_years=.N,lobbying.reports=sum(lobbying.reports,na.rm=T)),by=.(lobbyist_id,period)]

lobbyist.final[[item]] <-  time.periods[[item]][lobbyist.final[[item]],on=c('lobbyist_id','period')]

lobbyist.final[[item]] <- EPU[[item]][lobbyist.final[[item]],on="period"]

setnames(lobbyist.final[[item]],names(lobbyist.final[[item]]),gsub("\\.","_",names(lobbyist.final[[item]])))

lobbyist.final[[item]] <- candidates[lobbyist.final[[item]],on='lobbyist_id']

#create more outcome variables

lobbyist.final[[item]][,`:=`(amount_cy=amount/lobbying_reports,amount_per_lobbyist_cy=amount_per_lobbyist/lobbying_reports,amount_yrly=amount/n_years,amount_weighted_lim_yrly=amount_weighted_lim/n_years,amount_per_lobbyist_yrly=amount_per_lobbyist/n_years,amount_lim=NULL,amount_per_lobbyist_lim=NULL,amount_weighted=NULL,i_lobbying_reports=NULL)]

setnames(lobbyist.final[[item]],c("FE","FE_lim"),c("fe","fe_lim"))

lobbyist.final[[item]][is.na(house),house:=0]
lobbyist.final[[item]][is.na(sen),sen:=0]
lobbyist.final[[item]][is.na(staffer),staffer:=0]

#Save data
write_dta(lobbyist.final[[item]],paste("usr_gen/lobby_outcomes_v3_",item,".dta",sep=""))

}

save(lobbyist.final,file="usr_gen/lobbyist_final.RData")

